data & libraries

load(here::here("tmpdata/dfinfo.rda"))
load(here::here("tmpdata/sel1.data.l.anno.rda"))
df.all = do.call(rbind,
                 lapply(1:length(sel1.data.l.anno), function(i) {
                   df = sel1.data.l.anno[[i]]
                   df$rf = names(sel1.data.l.anno)[[i]]
                   
                   df2 = df %>%
                     mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
                     mutate(Z = (score_gplmDCA - mean(score_gplmDCA)) /sd(score_gplmDCA))
                   return(df2)
                 }))

funcs

library(RColorBrewer)
myColors <- brewer.pal(4,"Set1")
names(myColors) <- c("TP","TN","FP","FN")
col_scale <- scale_colour_manual(name = "type",values = myColors)


plot_tpfpfn = function(df) {
  L = max(df$V2) / 2
  len = max(df$V2)
  tbl.anno =
    df %>%
    mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
    # mutate(is.pred = ifelse(rank(-score_gplmDCA) < L, 1, 0)) %>%
    mutate(Z=(score_gplmDCA-mean(score_gplmDCA))/sd(score_gplmDCA),
           is.pred = Z > 3) %>%
    mutate(
      type = ifelse(is.pred > 0 & is.pair > 0, "TP", NA),
      type = ifelse(is.pred > 0 & is.pair == 0, "FP", type),
      type = ifelse(is.pred == 0 & is.pair > 0, "FN", type),
      type = ifelse(is.pred == 0 & is.pair == 0, "TN", type)
    )
  
  
  gg = tbl.anno %>%
    filter(is.pred > 0 | is.wc > 0) %>%
    ggplot() +
    geom_point(aes(x = V2, y = V1, color = type)) +
    coord_fixed()+
    xlim(1,len)+
    # ylim(1,len)+
    col_scale+scale_y_reverse(limits = c(len,1))
  
  gg
}

exp

df = sel1.data.l.anno$RF00032

plot_tpfpfn(df)

# df.all.anno = 

df.all.summ1 = 
df.all %>% 
  group_by(rf) %>%
  mutate(p_wc = sum(is.wc),
         p_nwc = sum(is.nwc),
         p_pair = sum(is.pair)) %>%
  filter(Z > 3) %>%
  summarise(sen_wc = sum(is.wc)/max(p_wc),
            sen_nwc = sum(is.nwc)/max(p_nwc),
            sen_pair = sum(is.pair)/max(p_pair)) %>%
  left_join(dfinfo) %>%
  arrange(meff)
## Joining, by = "rf"
# # df.all.summ1.sum_again =
#   df.all.summ1 %>%
#   summarise(n_gt80 = sum(f_pair > 0.5))
table(df.all.summ1$sen_wc > 0.75)
## 
## FALSE  TRUE 
##    20    26
df.all %>%
  ggplot(aes(x=is.wc>0,y=score_gplmDCA))+geom_boxplot()

contact maps

ggs = lapply(1:length(sel1.data.l.anno), function(i){
  df = sel1.data.l.anno[[i]]
  gg =plot_tpfpfn(df)
  
  rf = names(sel1.data.l.anno)[[i]]
  
  meff = dfinfo$meff[which(dfinfo$rf == rf)]
  
  L = dfinfo$L[which(dfinfo$rf == rf)]
  
  gg = gg+ ggtitle(paste0(rf, " meff: ", meff, " NT: ", L))
  return(gg)
})



# gridExtra::marrangeGrob(grobs = ggs, nrow=2,ncol=2)
plot_tpr = function(pred, label){
  df = data.frame(pred=pred, label = label) %>%
    arrange(desc(pred)) %>%
    mutate(rank_it= rank(-pred),
           TP =cumsum(label)/ rank_it) 
  return(df)
}
tmpdf = sel1.data.l.anno$RF01734
df = plot_tpr(tmpdf$score_gplmDCA,tmpdf$is.wc)
plot_tpfpfn(tmpdf)

testit2 = function(preds, label, L){
  P=sum(label)
  rslt = numeric(ncol(preds))
  for (i in 1:ncol(preds)){
    tmpdf = data.frame(s=preds[,i], l =label)
    rslt[i] = sum(tmpdf %>% arrange(desc(s)) %>% slice(1:round(L))%>% pull(l))/P
  }
  names(rslt) = names(preds)
  return(rslt)
}
plot_contacts = function(df){
  L = max(df$V2) /2
  len = max(df$V2)
  
  
  tbl.anno =
    df %>%
    mutate(is.contact_8 = ifelse(abs(dist) < 12, 1,0)) %>%
    mutate(is.pair = ifelse(is.na(name), 0, 1)) %>%
    mutate(is.pred = ifelse(rank(-score_gplmDCA) < L, 1, 0)) %>%
    mutate(
      type = ifelse(is.pred > 0 & is.contact_8 > 0, "TP", NA),
      type = ifelse(is.pred > 0 & is.contact_8 == 0, "FP", type),
      type = ifelse(is.pred == 0 & is.contact_8 > 0, "FN", type),
      type = ifelse(is.pred == 0 & is.contact_8 == 0, "TN", type)
    )
  

  gg = tbl.anno %>%
    filter(is.contact_8>0) %>%
    ggplot() +
    geom_point(aes(x = V1, y = V2),
               alpha = 0.5,
               color = "grey",
               size = 2) +
    geom_point(aes(x = V2, y = V1),
               alpha = 0.5,
               color = "grey",
               size = 2)+
    geom_point(data= tbl.anno %>% filter(is.pred>0),
               aes(x=V2,y=V1,color=type))+
    coord_fixed()+scale_y_reverse()+col_scale
    gg
}
df = sel1.data.l.anno$RF02012

plot_contacts(df)

plot_tpfpfn(df)

ggs2 = lapply(1:length(sel1.data.l.anno), function(i){
  df = sel1.data.l.anno[[i]]
  gg =plot_contacts(df)
  
  rf = names(sel1.data.l.anno)[[i]]
  
  meff = dfinfo$meff[which(dfinfo$rf == rf)]
  
  L = dfinfo$L[which(dfinfo$rf == rf)]
  
  gg = gg+ ggtitle(paste0(rf, " meff: ", meff, " NT: ", L))
  return(gg)
})



# gridExtra::marrangeGrob(grobs = ggs, nrow=2,ncol=2)
ggall = c(ggs,ggs2)
for (i in 1:46){
  grid.arrange(ggs[[i]],ggs2[[i]],ncol = 2)
}